# Project: TV polarization
# Author: Josh Ryan and Jeff Lyons
# Working Date: Winter 23
# Task: Use Imai, Kim, Wang PanelMatch analysis--analysis using ps match, Figure 5
# Updated for PanelMatch 3.1.1

library(foreign)
library(effects)
library(sciplot)
library(sandwich)
library(lmtest)   
library(BMS)
library(rJava)
library(lattice)
library(plyr)
library(lme4)
library(gmodels)
library(MASS)
library(stringr)
library(XLConnect)
library(reshape2)
library(xlsx)
library(dplyr)
library(doBy)
library(Hmisc)
library(stargazer)
library(xtable)
library(foreign)
library(dotwhisker)
library(berryFunctions)
library(ggplot2)
library(gridExtra)
library(wnominate)
library(tidyr)
library(oc)
library(haven)
library(PanelMatch)

#### Figure E8
dat <- read_dta("chamber_level_forstata_v4_102524.dta")
dat <- subset(dat, state_cham==71 | state_cham==72 | state_cham==43 | state_cham==44 | 
                state_cham==57 | state_cham==58 | state_cham==49 | state_cham==50 | 
                state_cham==27 | state_cham==28 | state_cham==5 | state_cham==6 | 
                state_cham==47 | state_cham==48 | state_cham==73 | state_cham==74 | 
                state_cham==39 | state_cham==40 | state_cham==61 | state_cham==62 | 
                state_cham==7 | state_cham==8 | state_cham==51 | state_cham==52 | 
                state_cham==96 | state_cham==54 | state_cham==79 | state_cham==80 |
                state_cham==23 | state_cham==24 | state_cham==69 | state_cham==70 | 
                state_cham==75 | state_cham==76 | state_cham==35 | state_cham==91 | 
                state_cham==92 | state_cham==87 | state_cham==88)

dat <- as.data.frame(dat)
dat$state_cham2 <- paste(dat$state, dat$Chamber, sep = "-")

# ========== LATE BUDGET ANALYSIS ==========
dat.latebudget <- dat[c("late_budget", "treatment", "veto", "mds1", "mds2", "state_ideo", 
                        "ranney", "lngsp", "tl", "lnexp", "citizen_ideo", "lnpop", 
                        "total_intro", "year", "state_cham")]

dat.latebudget <- subset(dat.latebudget, !(is.na(late_budget)))
dat.latebudget$count <- 1
state_cham_freq_df <- aggregate(count ~ state_cham, data = dat.latebudget, FUN = sum)

dat.latebudget <- dat.latebudget %>%
  group_by(state_cham) %>%
  mutate(state_cham_freq = n(),
         state_cham_max = sum(state_cham_freq)) %>%
  ungroup() %>%
  filter(state_cham_freq == 39)

table(dat.latebudget$state_cham_freq)

dat.latebudget <- as.data.frame(dat.latebudget)
dat.latebudget$state_cham <- as.integer(dat.latebudget$state_cham)
dat.latebudget$year <- as.integer(dat.latebudget$year)

# Create PanelData object for late budget
panel_data_latebudget <- PanelData(
  panel.data = dat.latebudget,
  unit.id = "state_cham",
  time.id = "year",
  treatment = "treatment",
  outcome = "late_budget"
)

results.ps.latebudget <- PanelMatch(
  panel.data = panel_data_latebudget,
  lag = 4,
  refinement.method = "ps.weight",
  match.missing = TRUE,
  covs.formula = ~ I(lag(total_intro, 1:4)),
  size.match = 10,
  qoi = "att",
  lead = 0:4,
  forbid.treatment.reversal = TRUE
)

mset.latebudget <- results.ps.latebudget$att
PE.results.latebudget <- PanelEstimate(sets = results.ps.latebudget, panel.data = panel_data_latebudget)
summary_matrix_latebudget <- summary(PE.results.latebudget)

# Extract data properly
coef <- summary_matrix_latebudget[, 1]
se <- summary_matrix_latebudget[, 2]
lower <- summary_matrix_latebudget[, 3]
upper <- summary_matrix_latebudget[, 4]
time <- c(0, 1, 2, 3, 4)

results.latebudget <- data.frame(
  coef = as.numeric(coef),
  se = as.numeric(se),
  lower = as.numeric(lower),
  upper = as.numeric(upper),
  time = time
)

plot.latebudget <- ggplot(results.latebudget, aes(time, coef)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
  theme_classic() +
  scale_y_continuous(breaks = seq(-.2, .2, .04)) +
  coord_cartesian(ylim = c(-.2, .2)) +
  scale_x_continuous(breaks = seq(0, 4, 1)) +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  theme(axis.line.x = element_line(color = "black", size = .5),
        axis.line.y = element_line(color = "black", size = .5)) +
  geom_hline(yintercept = 0, color = "gray") +
  xlab("Time From Adoption (in Years)") + ylab("Estimated Coefficient with CI") +
  ggtitle("Late Budget") + theme(plot.title = element_text(hjust = 0.5))

# ========== KURTOSIS ANALYSIS ==========
dat.kurtosis <- dat[c("budget_kurtosis", "treatment", "veto", "mds1", "mds2", "state_ideo", 
                      "ranney", "lngsp", "tl", "lnexp", "citizen_ideo", "lnpop", 
                      "total_intro", "year", "state_cham")]

dat.kurtosis <- as.data.frame(dat.kurtosis)
dat.kurtosis$state_cham <- as.integer(dat.kurtosis$state_cham)
dat.kurtosis$year <- as.integer(dat.kurtosis$year)

panel_data_kurtosis <- PanelData(
  panel.data = dat.kurtosis,
  unit.id = "state_cham",
  time.id = "year",
  treatment = "treatment",
  outcome = "budget_kurtosis"
)

results.ps.kurtosis <- PanelMatch(
  panel.data = panel_data_kurtosis,
  lag = 4,
  refinement.method = "ps.match",
  match.missing = TRUE,
  covs.formula = ~ I(lag(veto, 1:4)) + I(lag(mds1, 1:4)) + 
    I(lag(mds2, 1:4)) + I(lag(state_ideo, 1:4)) + 
    I(lag(ranney, 1:4)) + I(lag(lngsp, 1:4)) + 
    I(lag(tl, 1:4)) + I(lag(lnexp, 1:4)) + 
    I(lag(citizen_ideo, 1:4)) + I(lag(lnpop, 1:4)) + 
    I(lag(total_intro, 1:4)),
  size.match = 10,
  qoi = "att",
  lead = 0:4,
  forbid.treatment.reversal = TRUE
)

mset.kurtosis <- results.ps.kurtosis$att
PE.results.kurtosis <- PanelEstimate(sets = results.ps.kurtosis, panel.data = panel_data_kurtosis)
summary_matrix_kurtosis <- summary(PE.results.kurtosis)

coef <- summary_matrix_kurtosis[, 1]
se <- summary_matrix_kurtosis[, 2]
lower <- summary_matrix_kurtosis[, 3]
upper <- summary_matrix_kurtosis[, 4]
time <- c(0, 1, 2, 3, 4)

results.kurtosis <- data.frame(
  coef = as.numeric(coef),
  se = as.numeric(se),
  lower = as.numeric(lower),
  upper = as.numeric(upper),
  time = time
)

plot.kurtosis <- ggplot(results.kurtosis, aes(time, coef)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
  theme_classic() +
  scale_y_continuous(breaks = seq(-.2, .2, .04)) +
  coord_cartesian(ylim = c(-.2, .2)) +
  scale_x_continuous(breaks = seq(0, 4, 1)) +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  theme(axis.line.x = element_line(color = "black", size = .5),
        axis.line.y = element_line(color = "black", size = .5)) +
  geom_hline(yintercept = 0, color = "gray") +
  xlab("Time From Adoption (in Years)") + ylab("Estimated Coefficient with CI") +
  ggtitle("Budget Kurtosis") + theme(plot.title = element_text(hjust = 0.5))

# ========== PARTY DIFFERENCES ANALYSIS ==========
dat.diffs <- dat[c("diffs", "treatment", "veto", "mds1", "mds2", "state_ideo", 
                   "ranney", "lngsp", "tl", "lnexp", "citizen_ideo", "lnpop", 
                   "total_intro", "year", "state_cham")]

dat.diffs <- as.data.frame(dat.diffs)
dat.diffs$state_cham <- as.integer(dat.diffs$state_cham)
dat.diffs$year <- as.integer(dat.diffs$year)

panel_data_diffs <- PanelData(
  panel.data = dat.diffs,
  unit.id = "state_cham",
  time.id = "year",
  treatment = "treatment",
  outcome = "diffs"
)

results.ps.diffs <- PanelMatch(
  panel.data = panel_data_diffs,
  lag = 4,
  refinement.method = "ps.match",
  match.missing = TRUE,
  covs.formula = ~ I(lag(veto, 1:4)) + I(lag(mds1, 1:4)) + 
    I(lag(mds2, 1:4)) + I(lag(state_ideo, 1:4)) + 
    I(lag(ranney, 1:4)) + I(lag(lngsp, 1:4)) + 
    I(lag(tl, 1:4)) + I(lag(lnexp, 1:4)) + 
    I(lag(citizen_ideo, 1:4)) + I(lag(lnpop, 1:4)) + 
    I(lag(total_intro, 1:4)),
  size.match = 10,
  qoi = "att",
  lead = 0:4,
  forbid.treatment.reversal = TRUE
)

mset.diff <- results.ps.diffs$att
PE.results.diffs <- PanelEstimate(sets = results.ps.diffs, panel.data = panel_data_diffs)
summary_matrix_diffs <- summary(PE.results.diffs)

coef <- summary_matrix_diffs[, 1]
se <- summary_matrix_diffs[, 2]
lower <- summary_matrix_diffs[, 3]
upper <- summary_matrix_diffs[, 4]
time <- c(0, 1, 2, 3, 4)

results.diffs <- data.frame(
  coef = as.numeric(coef),
  se = as.numeric(se),
  lower = as.numeric(lower),
  upper = as.numeric(upper),
  time = time
)

plot.diffs <- ggplot(results.diffs, aes(time, coef)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
  theme_classic() +
  scale_y_continuous(breaks = seq(-.2, .2, .04)) +
  coord_cartesian(ylim = c(-.2, .2)) +
  scale_x_continuous(breaks = seq(0, 4, 1)) +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  theme(axis.line.x = element_line(color = "black", size = .5),
        axis.line.y = element_line(color = "black", size = .5)) +
  geom_hline(yintercept = 0, color = "gray") +
  xlab("Time From Adoption (in Years)") + ylab("Estimated Coefficient with CI") +
  ggtitle("Inter-party Polarization") + theme(plot.title = element_text(hjust = 0.5))

# ========== DEM SD ANALYSIS ==========
dat.demsd <- dat[c("dem_sd", "treatment", "veto", "mds1", "mds2", "state_ideo", 
                   "ranney", "lngsp", "tl", "lnexp", "citizen_ideo", "lnpop", 
                   "total_intro", "year", "state_cham")]

dat.demsd <- as.data.frame(dat.demsd)
dat.demsd$state_cham <- as.integer(dat.demsd$state_cham)
dat.demsd$year <- as.integer(dat.demsd$year)

panel_data_demsd <- PanelData(
  panel.data = dat.demsd,
  unit.id = "state_cham",
  time.id = "year",
  treatment = "treatment",
  outcome = "dem_sd"
)

results.ps.demsd <- PanelMatch(
  panel.data = panel_data_demsd,
  lag = 4,
  refinement.method = "ps.match",
  match.missing = TRUE,
  covs.formula = ~ I(lag(veto, 1:4)) + I(lag(mds1, 1:4)) + 
    I(lag(mds2, 1:4)) + I(lag(state_ideo, 1:4)) + 
    I(lag(ranney, 1:4)) + I(lag(lngsp, 1:4)) + 
    I(lag(tl, 1:4)) + I(lag(lnexp, 1:4)) + 
    I(lag(citizen_ideo, 1:4)) + I(lag(lnpop, 1:4)) + 
    I(lag(total_intro, 1:4)),
  size.match = 10,
  qoi = "att",
  lead = 0:4,
  forbid.treatment.reversal = TRUE
)

mset.demsd <- results.ps.demsd$att
PE.results.demsd <- PanelEstimate(sets = results.ps.demsd, panel.data = panel_data_demsd)
summary_matrix_demsd <- summary(PE.results.demsd)

coef <- summary_matrix_demsd[, 1]
se <- summary_matrix_demsd[, 2]
lower <- summary_matrix_demsd[, 3]
upper <- summary_matrix_demsd[, 4]
time <- c(0, 1, 2, 3, 4)

results.demsd <- data.frame(
  coef = as.numeric(coef),
  se = as.numeric(se),
  lower = as.numeric(lower),
  upper = as.numeric(upper),
  time = time
)

plot.demsd <- ggplot(results.demsd, aes(time, coef)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
  theme_classic() +
  scale_y_continuous(breaks = seq(-.2, .2, .04)) +
  coord_cartesian(ylim = c(-.2, .2)) +
  scale_x_continuous(breaks = seq(0, 4, 1)) +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  theme(axis.line.x = element_line(color = "black", size = .5),
        axis.line.y = element_line(color = "black", size = .5)) +
  geom_hline(yintercept = 0, color = "gray") +
  xlab("Time From Adoption (in Years)") + ylab("Estimated Coefficient with CI") +
  ggtitle("Democratic Intra-party Polarization") + theme(plot.title = element_text(hjust = 0.5))

# ========== REP SD ANALYSIS ==========
dat.repsd <- dat[c("rep_sd", "treatment", "veto", "mds1", "mds2", "state_ideo", 
                   "ranney", "lngsp", "tl", "lnexp", "citizen_ideo", "lnpop", 
                   "total_intro", "year", "state_cham")]

dat.repsd <- as.data.frame(dat.repsd)
dat.repsd$state_cham <- as.integer(dat.repsd$state_cham)
dat.repsd$year <- as.integer(dat.repsd$year)

panel_data_repsd <- PanelData(
  panel.data = dat.repsd,
  unit.id = "state_cham",
  time.id = "year",
  treatment = "treatment",
  outcome = "rep_sd"
)

results.ps.repsd <- PanelMatch(
  panel.data = panel_data_repsd,
  lag = 4,
  refinement.method = "ps.match",
  match.missing = TRUE,
  covs.formula = ~ I(lag(veto, 1:4)) + I(lag(mds1, 1:4)) + 
    I(lag(mds2, 1:4)) + I(lag(state_ideo, 1:4)) + 
    I(lag(ranney, 1:4)) + I(lag(lngsp, 1:4)) + 
    I(lag(tl, 1:4)) + I(lag(lnexp, 1:4)) + 
    I(lag(citizen_ideo, 1:4)) + I(lag(lnpop, 1:4)) + 
    I(lag(total_intro, 1:4)),
  size.match = 10,
  qoi = "att",
  lead = 0:4,
  forbid.treatment.reversal = TRUE
)

mset.repsd <- results.ps.repsd$att
PE.results.repsd <- PanelEstimate(sets = results.ps.repsd, panel.data = panel_data_repsd)
summary_matrix_repsd <- summary(PE.results.repsd)

coef <- summary_matrix_repsd[, 1]
se <- summary_matrix_repsd[, 2]
lower <- summary_matrix_repsd[, 3]
upper <- summary_matrix_repsd[, 4]
time <- c(0, 1, 2, 3, 4)

results.repsd <- data.frame(
  coef = as.numeric(coef),
  se = as.numeric(se),
  lower = as.numeric(lower),
  upper = as.numeric(upper),
  time = time
)

results.repsd$tscore <- results.repsd$coef / results.repsd$se

plot.repsd <- ggplot(results.repsd, aes(time, coef)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
  theme_classic() +
  scale_y_continuous(breaks = seq(-.2, .2, .04)) +
  coord_cartesian(ylim = c(-.2, .2)) +
  scale_x_continuous(breaks = seq(0, 4, 1)) +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  theme(axis.line.x = element_line(color = "black", size = .5),
        axis.line.y = element_line(color = "black", size = .5)) +
  geom_hline(yintercept = 0, color = "gray") +
  xlab("Time From Adoption (in Years)") + ylab("Estimated Coefficient with CI") +
  ggtitle("Republican Intra-party Polarization") + theme(plot.title = element_text(hjust = 0.5))

# ========== LEGISLATIVE PRODUCTIVITY ANALYSIS ==========
dat.prod <- dat[c("billprop", "treatment", "veto", "mds1", "mds2", "state_ideo", 
                  "ranney", "lngsp", "tl", "lnexp", "citizen_ideo", "lnpop", 
                  "total_intro", "year", "state_cham")]

dat.prod <- as.data.frame(dat.prod)
dat.prod$state_cham <- as.integer(dat.prod$state_cham)
dat.prod$year <- as.integer(dat.prod$year)

panel_data_prod <- PanelData(
  panel.data = dat.prod,
  unit.id = "state_cham",
  time.id = "year",
  treatment = "treatment",
  outcome = "billprop"
)

results.ps.prod <- PanelMatch(
  panel.data = panel_data_prod,
  lag = 4,
  refinement.method = "ps.match",
  match.missing = TRUE,
  covs.formula = ~ I(lag(veto, 1:4)) + I(lag(mds1, 1:4)) + 
    I(lag(mds2, 1:4)) + I(lag(state_ideo, 1:4)) + 
    I(lag(ranney, 1:4)) + I(lag(lngsp, 1:4)) + 
    I(lag(tl, 1:4)) + I(lag(lnexp, 1:4)) + 
    I(lag(citizen_ideo, 1:4)) + I(lag(lnpop, 1:4)) + 
    I(lag(total_intro, 1:4)),
  size.match = 10,
  qoi = "att",
  lead = 0:4,
  forbid.treatment.reversal = TRUE
)

mset.prod <- results.ps.prod$att
PE.results.prod <- PanelEstimate(sets = results.ps.prod, panel.data = panel_data_prod)
summary_matrix_prod <- summary(PE.results.prod)

coef <- summary_matrix_prod[, 1]
se <- summary_matrix_prod[, 2]
lower <- summary_matrix_prod[, 3]
upper <- summary_matrix_prod[, 4]
time <- c(0, 1, 2, 3, 4)

results.prod <- data.frame(
  coef = as.numeric(coef),
  se = as.numeric(se),
  lower = as.numeric(lower),
  upper = as.numeric(upper),
  time = time
)

plot.prod <- ggplot(results.prod, aes(time, coef)) +
  geom_point() +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0) +
  theme_classic() +
  scale_y_continuous(breaks = seq(-.2, .2, .04)) +
  coord_cartesian(ylim = c(-.2, .2)) +
  scale_x_continuous(breaks = seq(0, 4, 1)) +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  theme(axis.line.x = element_line(color = "black", size = .5),
        axis.line.y = element_line(color = "black", size = .5)) +
  geom_hline(yintercept = 0, color = "gray") +
  xlab("Time From Adoption (in Years)") + ylab("Estimated Coefficient with CI") +
  ggtitle("Legislative Productivity") + theme(plot.title = element_text(hjust = 0.5))

# ========== COMBINE ALL PLOTS ==========
all.plots <- grid.arrange(plot.latebudget, plot.kurtosis, plot.diffs, 
                          plot.demsd, plot.repsd, plot.prod, ncol = 3)

# ========== FREQUENCY DISTRIBUTION PLOTS ==========
par(mfrow = c(2, 3))
plot(mset.latebudget, xlim = c(0, 100), ylim = c(0, 25), main = "Late Budget")
plot(mset.kurtosis, xlim = c(0, 100), ylim = c(0, 25), main = "Budget Kurtosis")
plot(mset.diff, xlim = c(0, 100), ylim = c(0, 25), main = "Inter-Party Polarization")
plot(mset.demsd, xlim = c(0, 100), ylim = c(0, 25), main = "Dem. Intra-Party Polarization")
plot(mset.repsd, xlim = c(0, 100), ylim = c(0, 25), main = "GOP Intra-Party Polarization")
plot(mset.prod, xlim = c(0, 100), ylim = c(0, 25), main = "Leg. Productivity")